******************************************************************************************************************
* Probabilistic disability employment results using 'simulate'
******************************************************************************************************************

capture program drop disemp_sim
program define disemp_sim, eclass 
		syntax [varlist] [if] [in] [pweight iweight], distypes(namelist) [subclustervar(name)] disvar(name) [predictedvar(name)] [fixedpredvar(name)] [wgvar(name)] [irtvar(name)] ///
			/**/ [allirtvar(name)] empvar(name) [controls(varlist fv)] [controls2(varlist fv)] 
		marksample touse
		tokenize 		"`varlist'"
		local clustervar	"`1'"			
		// for checking, if 'noisily' is switched on
		dis _newline(20) "Next iteration - distypes are `distypes', predictedvar is `predictedvar', prediction var wo random element is `fixedpredvar', IRT var is `irtvar', disvar is `disvar', empvar is `empvar', regimevar is removed, clustervar is `clustervar'"
		dis "Weight is `weight', touse is `touse'" 
		
		****************************************************************************************************
		* PRELIMINARIES
		****************************************************************************************************
		// Creating the predicted disvar for each iteration - weights are constant, but random element needs to change in each botstrap replication 
		capture drop rand1 
		capture drop `predictedvar'
		gen rand1 = runiform()
		gen 	`predictedvar' = 0 if						 ~missing(p_predicted)
		replace `predictedvar' = 1 if p_predicted  > rand1 & ~missing(p_predicted)
		label var `predictedvar' "Disability IN BOOTSTRAP - regression-based measure using ${disvar}"

		
		****************************************************************************************************
		* ESTIMATION COMMANDS
		****************************************************************************************************
		local fullmatlist = ""
		if "`touse'"=="__000000"				local ifstart  ""
		else									local ifstart  "`touse' &"
		foreach distype	in `distypes' {
			
			* Need to exclude 'dis' for England/USA, which has no self-reported LLSI data
			if "`measure'"=="${disvar}"		local thisif = "if `ifstart' !inlist(${countryvar},51,61)"
			if "`measure'"!="${disvar}"		local thisif = "if `ifstart' !inlist(${countryvar},99)"		// needs to be an if command here so that can be added too, but this doesn't exclude anyone
			dis "touse is `touse', if for this measure is `thisif'"

			// Command for DIS EMP GAP
			logit `empvar' ib(1).``distype'var'##i.`clustervar' `controls' `controls2' `thisif' [`weight' `exp']	
				* Samples sizes  in the matrix n_`distype' (e.g. n_predicted), which is outputted from the command below
				matrix n_`distype' =  `e(N)' 									// [1,1] is obs, [1,2] is n(people) = `e(N_clust)'
					levelsof `clustervar' if e(sample), local(wavelist_`distype')			// numbers of clusters (e.g. countries, regimes)
				matrix n_`distype' = n_`distype' , wordcount("`wavelist_`distype''")		// revises the matrix so that there's an extra col with number of country-wave clusters
				* Margins
				margins i.``distype'var' , over(`clustervar') 	post at(${controlsmeans}) 	
					matrix disemp_`distype'_b  = e(b)
					matrix disemp_`distype'_ll = r(table)[rownumb(r(table), "ll"), 1...]
					matrix disemp_`distype'_ul = r(table)[rownumb(r(table), "ul"), 1...]
					* To get CIs for emp gap, need to use lincom as follows (only for SIMULATE, where need to get CIs)
					local i = 0
					local gapcolnames = "" 
					foreach wave in `wavelist_`distype'' 	{
						local i = `i' + 1			// a consecutive counter, rather than the number of the country
						lincom _b[`wave'.`clustervar'#0.``distype'var'] - _b[`wave'.`clustervar'#1.``distype'var']
						if `i'==1		matrix disgap_`distype'_b  = r(estimate)
						else			matrix disgap_`distype'_b  = disgap_`distype'_b , r(estimate)
						if `i'==1		matrix disgap_`distype'_ll = r(lb)
						else			matrix disgap_`distype'_ll = disgap_`distype'_ll, r(lb)
						if `i'==1		matrix disgap_`distype'_ul = r(ub)
						else			matrix disgap_`distype'_ul = disgap_`distype'_ul, r(ub)
						local gapcolnames = "`gapcolnames' `wave'.`clustervar'"
					/*end country-wave loop*/				}	
					foreach stat in b ll ul		{
						matrix colnames disgap_`distype'_`stat' = `gapcolnames'
					/**/						}
					
			// Command for DIS ITSELF
			logit 				``distype'var'    i.`clustervar' `controls' `controls2' `thisif' [`weight' `exp'] 
				matrix  n_`distype' =  n_`distype', `e(N)'									// Sample size for dis, just as a check
				* Margins
				margins					, over(`clustervar') 	post at(${controlsmeans}) 	
				matrix dis_`distype'_b  = e(b)
				matrix dis_`distype'_ll = r(table)[rownumb(r(table), "ll"), 1...]
				matrix dis_`distype'_ul = r(table)[rownumb(r(table), "ul"), 1...]
				
			// Labelling results
			if "`fullmatlist'"==""	local fullmatlist = "n_`distype'"
			else 					local fullmatlist = "`fullmatlist', n_`distype'"
			foreach stat in b ll ul		{
				matrix coleq 	disgap_`distype'_`stat' = `distype'_disgap_`stat'
				matrix coleq 	disemp_`distype'_`stat' = `distype'_disemp_`stat'
				matrix coleq 	   dis_`distype'_`stat' = `distype'_dis_`stat'
				local fullmatlist = "`fullmatlist', disgap_`distype'_`stat', disemp_`distype'_`stat', dis_`distype'_`stat'"		// 	The list of all matrices to include in the results
			/**/						}
			matrix coleq	     n_`distype'   = n_`distype'
			// Labelling the matrix of sample sizes
			matrix colnames n_`distype' = "obs_emp_`distype'" "cw_emp_`distype'" "obs_dis_`distype'" // labelling the matrix

		/*end distypes loop*/ }
		
		****************************************************************************************************
		* OUTPUTTING
		****************************************************************************************************
	
		// COMBINING VECTORS OF REGRESSION RESULTS INTO e(b)
		dis `"`fullmatlist'"'
		tempname bb
		matrix `bb' = `fullmatlist'
		matrix list `bb'
		ereturn post `bb' 
		ereturn local cmd="bootstrap"
		// For bootstrap, also need to post the sample size
		ereturn scalar N = n_predicted[1,1]		// This is the sample size for the employment regression for SR disability (sample size for distype regression is [1,3])
		
	// This was just used in testing, to see the full datasets that were being used in each iteration
	*save "${workingdata}/simfulldata_it${i}.dta", replace
	*global i = ${i} + 1	
end

